#!/usr/bin/perl -w
#
# Copyright (C) 2011  Pace Plc
# All Rights Reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# - Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# - Neither the names of the copyright holders nor the names of their
#   contributors may be used to endorse or promote products derived from this
#   software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

# TR-069 data model parse script
#
# Parses antiword DocBook XML output, generated by:
#
# antiword -x db data-model.doc >data-model.xdb
#
# (where data-model.doc is a DSLF TR or WT that defines a TR-069 data model
# using the traditional Word tables).
#
# Searches for "tables of interest", meaning data model definitions,
# notify requirements, and profile definitions.
#
# Generates XML output compliant with the CWMP data model XML Schema.
#
# See further documentation at the end of the file.

# XXX currently using antiword-0.37, which extracts footnotes as <footnote>
#     but (a) doesn't seem to do it reliably, and (b) the script doesn't handle
#     them, so the text is included literally

# XXX reverted to antiword-0.36, which doesn't do footnotes but is preferable!

# XXX why aren't profiles checked here rather than in report.pl? all possible
#     validity checks should be made here really

use strict;
use Data::Dumper;
use Getopt::Long;
use Pod::Usage;
use XML::LibXML;

use charnames ':full';
binmode STDOUT, ':utf8';
binmode STDERR, ':utf8';

# cwmp-datamodel and cwmp-datamodel-report schema versions (the latter will
# never change from 0-1)
my $dm_version = "1-4";
my $dmr_version = "0-1";

# command-line options
my $components = 0;
my $define = {name => 'unknown', version => '1.0'};
my $device = 0;
my $dmr;
my $help = 0;
my $keys = '';
my $maxnest = 1;
my $nomodels = 0;
my $noobjects = 0;
my $nooutput = 0;
my $noparameters = 0;
my $noprofiles = 0;
my $notypes = 0;
my $oldenum = 0;
my $oldprofile = 0;
my $oldsyntax = 0;
my $pedantic;
my $rootmodel = 0;
my $spec = 'unknown';
my $verbose;
GetOptions('components' => \$components,
	   'define=s' => $define,
	   'device' => \$device,
           'dmr' => \$dmr,
	   'help' => \$help,
	   'keys:s' => \$keys,
	   'maxnest=i' => \$maxnest,
	   'nomodels' => \$nomodels,
	   'noobjects' => \$noobjects,
	   'nooutput' => \$nooutput,
	   'noparameters' => \$noparameters,
           'noprofiles' => \$noprofiles,
	   'notypes' => \$notypes,
           'oldenum' => \$oldenum,
           'oldsyntax' => \$oldsyntax,
           'oldprofile' => \$oldprofile,
	   'pedantic:i' => \$pedantic,
	   'rootmodel' => \$rootmodel,
           'spec:s' => \$spec,
	   'verbose:i' => \$verbose) or pod2usage(2);
pod2usage(1) if $help;

$dmr = 1 if defined($dmr) and !$dmr;
$dmr = 0 unless defined($dmr);

$pedantic = 1 if defined($pedantic) and !$pedantic;
$pedantic = 0 unless defined($pedantic);

$verbose = 1 if defined($verbose) and !$verbose;
$verbose = 0 unless defined($verbose);

# if spec isn't a URN, prefix boiler-plate
$spec = 'urn:broadband-forum-org:' . $spec unless $spec =~ /^urn:/;

# replace define strings of the form 'a+b+c' with arrays of the form
# ['a', 'b', 'c']
foreach my $key (keys %$define) {
    my @vals = split '\+', $define->{$key};
    $define->{$key} = \@vals;
}

# if keys file was specified (it's an XML file that maps UPnP service names
# to table names, indices and keys), parse it (returns a convenient hash)
$keys = parse_keys_file($keys);

# parse files specified on the command line
# XXX things don't really work if there is more than one file
my $current_model = $define->{name}[0];
my $current_version = $define->{version}[0];
my $current_object;
my $current_parameter;
foreach my $file (@ARGV) {
    parse_file($file);
}

# That's the end of the main program; all the rest is subroutines
my $inform_map = {};
my $notify_map = {};
my $output_services = {};
my $bibref_ids = {};
my $bibref_map = {};
my $components_map = {};

# output multi-line string to stdout, handling indentation
sub output
{
    my ($indent, $lines) = @_;

    return if $nooutput;

    foreach my $line (split /\n/, $lines) {
        print '  ' x $indent, $line, "\n";
	$indent = 0;
    }
}

# parse an XML file generated by running antiword on a TR or WT 
sub parse_file
{
    my ($file)= @_;

    print STDERR "parsing file $file\n" if $verbose;

    # get rid of the DOCTYPE line (causes problems, especially when no network
    # is available)
    # XXX there may be a way of telling the XML parser to ignore it?
    my $string = remove_doctype($file);

    # parse the resulting XML
    my $parser = XML::LibXML->new();
    my $tree = $parser->parse_string($string);
    my $root = $tree->getDocumentElement;

    # start of XML
    my $quote = $dmr ? qq{} : qq{\"};
    output 0, qq{<?xml version="1.0" encoding="UTF-8"?>};
    output 0, qq{<dm:document xmlns:dm="urn:broadband-forum-org:cwmp:datamodel-$dm_version"};
    output 0, qq{             xmlns:dmr="urn:broadband-forum-org:cwmp:datamodel-report-$dmr_version"} if $dmr;
    output 0, qq{             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"};
    output 0, qq{             xsi:schemaLocation=\"urn:broadband-forum-org:cwmp:datamodel-${dm_version} http://www.broadband-forum.org/cwmp/cwmp-datamodel-${dm_version}.xsd$quote};
    output 0, qq{                                 urn:broadband-forum-org:cwmp:datamodel-report-${dmr_version} http://www.broadband-forum.org/cwmp/cwmp-datamodel-report.xsd\"} if $dmr;
    output 0, qq{             spec="$spec" file="$file">};

    # always define or import standard data types
    # XXX need to use correct versions (check 39 versus 49 for IPv6 addresses)
    if (!$notypes && $current_model eq 'Device' && $current_version eq '1.0') {
        output 1, qq{<dataType name="IPAddress">};
        output 2, qq{<description>All IPv4 addresses and subnet masks are represented as strings in IPv4 dotted-decimal notation.  All IPv6 addresses and subnet masks MUST be represented using any of the 3 standard textual representations as defined in RFC 3513 {{bibref|RFC3513}}, sections 2.2.1, 2.2.2 and 2.2.3.  Both lower-case and upper-case letters can be used.  Use of the lower-case letters is RECOMMENDED.  Examples of valid IPv6 address textual representations:
* 1080:0:0:800:ba98:3210:11aa:12dd
* 1080::800:ba98:3210:11aa:12dd
* 0:0:0:0:0:0:13.1.68.3
Unspecified or inapplicable IP addresses and subnet masks MUST be represented as empty strings unless otherwise specified by the parameter definition.</description>};
        output 2, qq{<string>};
        output 3, qq{<size maxLength="39"/>};
        output 3, qq{<pattern value="">};
        output 4, qq{<description>Empty Pattern.</description>};
        output 3, qq{</pattern>};
        output 3, qq{<pattern value="(\\d{1,3}\\.){3}\\d{1,3}">};
        output 4, qq{<description>IPv4 Pattern - doesn\'t limit values to 255.</description>};
        output 3, qq{</pattern>};
        output 3, qq{<pattern value="([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}">};
        output 4, qq{<description>IPv6 Pattern - doesn\'t support compression to "::".</description>};
        output 3, qq{</pattern>};
        output 2, qq{</string>};
        output 1, qq{</dataType>};

        output 1, qq{<dataType name="MACAddress">};
        output 2, qq{<description>All MAC addresses are represented as strings of 12 hexadecimal digits (digits 0-9, letters A-F or a-f) displayed as six pairs of digits separated by colons.  Unspecified or inapplicable MAC addresses MUST be represented as empty strings unless otherwise specified by the parameter definition.</description>};
        output 2, qq{<string>};
        output 3, qq{<size maxLength="17"/>};
        output 3, qq{<pattern value="([0-9A-Fa-f][0-9A-Fa-f]:){5}([0-9A-Fa-f][0-9A-Fa-f])"></pattern>};
        output 2, qq{</string>};
        output 1, qq{</dataType>};
    }

    # XXX it's a bit more complicated than this; would like the imports exactly
    #     to match the PD-148 dependencies
    else {
        output 1, qq{<import file="tr-106-1-0-types.xml" spec="urn:broadband-forum-org:tr-106-1-0">};
        output 2, qq{<dataType name="IPAddress"/>};
        output 2, qq{<dataType name="MACAddress"/>};
        output 1, qq{</import>};
    }

    # find tables of interest
    my $tables = find_tables($root);

    # find and parse (bibliographic) references
    my $references = find_references($root);
    parse_references($references);

    # expect exactly one model, inform and notify table
    my $num;
    $num = @{$tables->{model}};
    my $exp = @{$define->{name}};
    print STDERR "warning: expected exactly $exp model tables (found $num)\n"
	if $num != $exp;
    $num = @{$tables->{inform}};
    print STDERR "warning: expected exactly 0-1 forced inform tables (found $num)\n"
	if $num > 1;
    $num = @{$tables->{notify}};
    print STDERR "warning: expected 0-3 active notify tables (found $num)\n"
	if $num > 3;

    # parse tables of interest, inform and notify onse first since their
    # results are used in the model one
    # XXX index is hacked for inform, notify and profile tables (need to
    #     associate them with their models)
    foreach my $table (@{$tables->{inform}}) {
	parse_inform_table($table, 0);
    }
    foreach my $table (@{$tables->{notify}}) {
	parse_notify_table($table, 0);
    }
    my $index = 0;
    unless ($nomodels) {
        foreach my $table (@{$tables->{model}}) {
            parse_model_table($table, $index, $tables->{profile});
            $index++ if @{$define->{name}} > $index + 1;
        }
    }
    if ($oldprofile) {
        unless ($noprofiles) {
            foreach my $table (@{$tables->{profile}}) {
                parse_profile_table($table, 0);
            }
        }
    }

    # check that all entries in the inform and notify tables were referenced
    unless ($nomodels) {
        while (my ($oname, $objmap) = each %$inform_map) {
            while (my ($pname, $value) = each %$objmap) {
                print STDERR "invalid parameter $oname$pname in forced ".
                    "inform table\n" unless $value == 2;
            }
        }
        while (my ($oname, $objmap) = each %$notify_map) {
            while (my ($pname, $entry) = each %$objmap) {
                print STDERR "invalid parameter $oname$pname in active ".
                    "notify table\n" unless $entry->{referenced};
            }
        }
    }

    # end of XML
    output 0, qq{</dm:document>};
}

# find the tables of interest, returning a reference to a hash of the form
# {model => [model-table-node...], inform => [inform-table-node...],
# notify => [notify-table-node...], profile => [profile-table-node...]}.
sub find_tables
{
    my ($root) = @_;

    print STDERR "finding tables of interest\n" if $verbose;

    # hash referencing tables of interest
    my $tables = {model => [], inform => [], notify => [], profile => []};

    # patterns that table captions must match (used only as pre-condition;
    # column headings must match too); matched strings (if any) are saved
    # in the hash as arg0, arg1...
    # XXX profile pattern allows multiple versions of profiles but only
    #     generates the first one (need to parse the version number from the
    #     column headings)
    # XXX have disabled the profile pattern but it doesn't work because there
    #     is currently no way of determining which model it corresponds to
    my $cappats = {
#	model => '^Table.*',
	model => '',
	inform => '^Table.*Forced Inform',
	notify => '^Table.*? (\w+) Active Notification',
#	profile => '^Table\s+\S+\W+(\w+)[\s:]*(\d*).*Profile Definition\s*(for)?\s*a?\s*(\w*)\s*:?\s*\d*\s*(and)?\s*a?n?\s*(\w*)',
        profile => ''
    };

    # patterns that (concatenated) column headings must match
    my $colpats = {
	model => '^Name.*Type.*W(rite)?.*Description.*',
	inform => '^Parameter.*',
	notify => '^Parameter.*',
	profile => '^Name.*Requirement.*'};

    # look at all para elements, looking for tables of interest (all tables
    # follow this pattern):
    #   <para>
    #     caption
    #   </para>
    #   <para>
    #     <informaltable>...</informaltable>
    #   </para>
    my $previous = {};
    my $hash = {};
    foreach my $para ($root->findnodes('.//para')) {

        # ignore paragraphs with empty content
        next unless $para->findvalue('normalize-space(.)');

	# if para contains a table, extract the column headings
	my $table = ($para->findnodes('.//informaltable'))[0];
	if ($table) {
	    my $header = ($table->findnodes('.//row[1]'))[0];
	    my $value = $header->findvalue('.//entry');
	    $value = white_strip($value, {black => 1});

	    # if previous para contained matching caption and column headings
	    # match as well, then add an entry to the list of tables
	    for my $type ('model', 'inform', 'notify', 'profile') {
		if ($previous->{$type} && $value =~ /$colpats->{$type}/i) {
		    $hash = {table => $table};
		    for my $arg (keys %{$previous}) {
			$hash->{$arg} = $previous->{$arg} if $arg =~ /^arg/;
		    }
		    push(@{$tables->{$type}}, $hash);
                    print STDERR "found $type table\n" if $verbose;
		}
	    }

	    # XXX workaround for WT-135v8.1.4, where the model table has got
	    #     split into two Word tables (would perhaps be better to use an
	    #     array of tables, since this doesn't scale to multiple tables)
	    my $entry = ($table->findnodes('.//row[2]/entry[1]'))[0];
	    if ($entry) {
		my $name = $entry->findvalue('normalize-space(.)');
		if ($name eq 'Presence') {
		    $hash->{table2} = $table;
		}
	    }
	}

	# check whether this para's caption matches and, if so, collect
	# matched values
	$previous = {};
	my $caption = $para->findvalue('normalize-space(.)');
	for my $type ('model', 'inform', 'notify', 'profile') {
	    my @match = ($caption =~ /$cappats->{$type}/i);
	    $previous->{$type} = @match;
	    for (my $i=0; $i<@match; $i++) {
		my $arg = 'arg'.$i;
		$previous->{$arg} = $match[$i];
	    }
            print STDERR "found $type table caption\n"
                if $cappats->{$type} && @match && $verbose;
	}
    }

    return $tables;
}

# find bibliographic refedfences, returning a reference to an array of textual
# (unparsed) references
sub find_references
{
    my ($root) = @_;

    print STDERR "finding (bibliographic) references\n" if $verbose;

    # pattern to match in para before ordered list that defines the references
    my $patt = 'the following (references|documents)|the reference to a document';

    # array referencing references (sic).
    my $references = [];

    # look at all para elements, looking for tables of interest (all tables
    # follow this pattern):
    #   <para>
    #     /the following references.../i (etc)
    #   </para>
    #   <orderedlist numeration='arabic'>
    #     <listitem>
    #       <para>
    #         name, title, hyperlink etc.
    #       </para>
    #     </listitem>
    #     ...
    #   </orderedlist>
    #   ...
    foreach my $para ($root->findnodes('.//para')) {
        my $value = $para->findvalue('.');
        if ($value =~ /$patt/i) {
            print STDERR "found introductory paragraph\n" if $verbose;
            # XXX step over text node; is this safe?
            my $list = $para->nextSibling();
            $list = $list->nextSibling();
            # XXX sometimes there's an empty para next
            if ($list->nodeName() eq 'para') {
                print STDERR "stepping over empty (?) para\n" if $verbose;
                $list = $list->nextSibling();
                $list = $list->nextSibling();
            }
            if ($list->nodeName() ne 'orderedlist' ||
                $list->findvalue('@numeration') ne 'arabic') {
                print STDERR "not followed by orderedlist/arabic\n"
                    if $verbose;
            } else {
                foreach my $para ($list->findnodes('.//para')) {
                    my $value =
                        description_fix(white_strip($para->findvalue('.')));
                    print STDERR "found reference: $value\n" if $verbose;
                    push @$references, $value;
                }
            }
        }
    }

    return $references;
}

# parse an inform table
sub parse_inform_table
{
    my ($info, $index) = @_;

    my $table = ($info->{table})[0];
    my $modnam = @{$define->{name}}[$index];

    print STDERR "parsing forced inform table\n" if $verbose;

    # table has single column, which is parameter name (full path)
    my $first = 1;
    foreach my $row ($table->findnodes('.//row')) {

	# skip header row
	if ($first) {
	    $first = 0;
	    next;
	}

	# get parameter path
	my $path = get_entry($row, 1, {black => 1});

        my ($curobj, $name) = ($path =~ /^(.*?)([^\.]*)$/);
        $curobj = name_fix($curobj, $modnam, 1);
        $name = name_fix($name, $modnam, 0);

        $inform_map->{$curobj}->{$name} = 1;
        print STDERR "added $curobj$name to forced inform map\n"
            if $verbose > 1;
    }
}

# parse a notify table
sub parse_notify_table
{
    my ($info, $index) = @_;

    my $table = ($info->{table})[0];
    my $modnam = @{$define->{name}}[$index];

    # arg0 (assuming the correct pattern in find_tables) will be the word that
    # precedes "Active Notification" int the table caption, i.e. "Forced",
    # "Default" or <other>.
    my $activeNotify = {Forced => 'forceEnabled',
                        Default => 'forceDefaultEnabled'}->{$info->{arg0}};
    $activeNotify = 'canDeny' unless $activeNotify;

    # table has single column which, depending on table, is either object /
    # parameter name or else parameter path
    my $first = 1;
    my $curobj = '';
    foreach my $row ($table->findnodes('.//row')) {

	# skip header row
	if ($first) {
	    $first = 0;
	    next;
	}

	# get object / parameter name or else parameter path
	my $path = get_entry($row, 1, {black => 1});

        # if it contains an "internal" dot but doesn't end with a dot,
        # it's a parameter path
        my $name = '';
        if ($path =~ /.\..*[^\.]$/) {
            ($curobj, $name) = ($path =~ /^(.*?)([^\.]*)$/);
            $curobj = name_fix($curobj, $modnam, 1);
        } elsif ($path =~ /\.$/) {
            $curobj = name_fix($path, $modnam, 1);
	} else {
            $name = $path;
        }

	# parameter
	if ($name) {
            $name = name_fix($name, $modnam, 0);
	    $notify_map->{$curobj}->{$name} = {activeNotify => $activeNotify,
                                               referenced => 0};
	    print STDERR "added $curobj$name to active notify map\n"
                if $verbose > 1;
	}
    }
}

# parse a model table
sub parse_model_table
{
    my ($inf, $ind, $pro) = @_;

    my $tab = $inf->{table};
    my $tab2 = $inf->{table2};

    print STDERR "parsing model table $ind\n" if $verbose;

    # column headings:
    # TR-069: Name, Type, Write, Read, Description
    # TR-098: Name, Type, Write, Description, Default
    # TR-106: Name, Type, Write, Description
    # WT-135: Name, Type, Write, Description, Default
    # WT-140: Name, Type, Write, Description, Default
    # WT-152: Name, Type, Write, Description, Default, Version
    # WT-153: Name, Type, Write, Description, Default, Version

    # column indices
    my $nam_col; # 1
    my $typ_col; # 2
    my $wrt_col; # 3
    my $red_col; # 4 (if present)
    my $dsc_col; # 4 (or 5 if Read present)
    my $def_col; # 5 (if present)
    my $ver_col; # 6 (if present)

    # start of XML
    my $mnm = @{$define->{name}}[$ind];
    my $ver = @{$define->{version}}[$ind];
    my $iss = $rootmodel ? '' : ' isService="true"';
    my $mod = qq{<model name="$mnm:$ver"$iss>};

    # remember current model, version, object and parameter
    # XXX should handle at outer level?
    # XXX would be nice to avoid global variables
    $current_model = $mnm;
    $current_version = $ver;
    $current_object = '';
    $current_parameter = '';

    # active objects
    my $act = [];

    # defined components
    my $com = [];

    # current object name (used only when maxnest is 0)
    my $cur = '';

    # if requested, output top-level 'model' element
    unless ($components) {
        output(1, $mod);
        output_model_parameter($act, $cur, $mnm.'NumberOfEntries',
                               'unsignedInt', '',
                               'Number of entries in '.$mnm.' table.', undef,
                               '1.0') unless $rootmodel;
    }

    # if requested, inject spurious top-level 'Device' object
    # XXX should really only do this if current_version is 1.0
    if ($device && $current_model eq 'Device') {
	$cur = $current_model . '.';
	output_model_object_start($act, $com, $cur, 0,
				  'The top-level object for a Device.',
				  $define->{version}[$ind]);
    }

    # process all rows
    my @rows = $tab->findnodes('.//row');
    push(@rows, $tab2->findnodes('.//row')) if $tab2;
    foreach my $row (@rows) {
        if (!$nam_col) {
            $nam_col = 1;
            $typ_col = 2;
            $wrt_col = 3;
            $red_col = 4 if get_entry($row, 4) =~ /Read/;
            $dsc_col = $red_col ? 5 : 4;
            $def_col = 5 if get_entry($row, 5) =~ /Default/;
            $ver_col = 6 if get_entry($row, 6) =~ /Version/;
            next;
        }
        my $nam = get_entry($row, $nam_col, {black => 1, blackwarn => 1});
        my $typ = get_entry($row, $typ_col, {black => 1});
        my $wrt = get_entry($row, $wrt_col);
        my $red = get_entry($row, $red_col);
        my $dsc = get_entry($row, $dsc_col, {noescape => 1, raw => 1});
        my $def = get_entry($row, $def_col);
        my $ver = get_entry($row, $ver_col, {default => $current_version});

	# we ignore the "Read" column, which is present only in old data model
        # definitions that pre-date profiles

	# fix fields
	$nam = name_fix($nam, $mnm, $typ =~ /object/i);
	$typ = type_fix($typ, $nam);
	$dsc = description_fix($dsc);
	$def = default_fix($def);
	$ver = version_fix($ver);

	# ignore entries with blank name or type, or type that doesn't start
        # with a word character
        # XXX sometimes ellipsis is used in the type field, e.g. in TR-111
	# XXX blank name and type has been seen with deleted rows, so this
	#     isn't necessarily a formatting error
	if (!$nam || !$typ || $typ !~ /^\w/) {
	    print STDERR "parse_model_table: empty name ($nam) or type " .
		"($typ); item ignored\n\tcontained within @$act\n"
		if $pedantic > 1;
	    next;
	}

	# default version to data model version
	$ver = $define->{version}[$ind] unless $ver;

	if ($typ =~ 'object') {
	    $cur = $nam = check_object_name($act, $nam);
	    output_model_object_start($act, $com, $nam, $wrt, $dsc, $ver);
            $current_object = $nam;
            $current_parameter = '';
        } else {
	    output_model_parameter($act, $cur, $nam, $typ, $wrt, $dsc, $def,
				   $ver);
            $current_parameter = $nam;
	}
    }

    # this will close any open objects
    check_object_name($act);

    # if collected components, output model now
    if ($components) {
	output(1, $mod);
	foreach my $c (@$com) {
            my $path = $c->{path} ? qq{ path="$c->{path}"} : qq{};
	    output(2, qq{<component$path ref="$c->{name}"/>});
	}
    }

    unless ($oldprofile) {
        foreach my $tab (@$pro) {
            parse_profile_table($tab, $ind);
        }
    }

    # end of model
    output 1, qq{</model>};
}

# check object name against currently active objects, closing any
# implicitly-closed objects and returning the last component of the name
# (for tables, this last component will be of the form "name{i}", with no
# dot)
sub check_object_name
{
    my ($active, $name) = @_;

    # allow name to be omitted (when called just to close active objects)
    $name = '' unless $name;

    # get components from name
    # ".{" is temporarily replaced with "{" for parsing simplicity
    #$name =~ s/\.\{/\{/g;
    my @comps = split /\./, $name;

    while (@$active && @$active >= @comps) {
	output_model_object_end($active);
    }

    # if maxnest is 1, must close all active objects (there will be only 1)
    if ($maxnest == 1) {
	while (@$active) {
	    output_model_object_end($active);
	}
    }

    # returned name is full path if maxnest is 0 or 1
    # change "{" back to ".{" before returning
    my $retname = @comps ? $comps[@comps-1] : '';
    $retname = $name if $maxnest <= 1;
    $retname =~ s/$/\./ if $maxnest > 1;
    #$retname =~ s/\{/\.\{/g;
    return $retname;
}

# output a model object definition
sub output_model_object_start
{
    my ($active, $comps, $name, $write, $description, $version) = @_;

    print STDERR "object $name\n" if $verbose;

    my ($deprecated, $obsoleted) = get_attributes($description);
    my $access = access_string($write);

    $description = xml_escape($description);
    $description = expand_references($description);

    my $cname = component_name($name);
    if ($components && $comps) {
	output @$active + 1, qq{<component name="$cname">};
	push @$comps, {name => $cname, path => path_name($active, $name)}; 
    }
    unless ($noobjects) {
	my ($tname, $ename) = object_name($name);
        my $minEntries = $ename ? '0' : '1';
        my $maxEntries = $ename ? 'unbounded' : '1';
        ($minEntries, $maxEntries) = modify_entries($minEntries, $maxEntries,
                                                    $description);
        my $needEntries = $maxEntries eq 'unbounded' ||
            ($maxEntries > 1 && $maxEntries > $minEntries);
        $minEntries = qq{ minEntries="$minEntries"};
        $maxEntries = qq{ maxEntries="$maxEntries"};
        my ($oname, $knames) = get_table_info($tname, $ename);
        my $numEntriesParameter = ($oname && $needEntries) ?
            qq{ numEntriesParameter="$oname"} : qq{}; 
	my $status = ($obsoleted eq 'true') ? qq{ status="obsoleted"} :
	    ($deprecated eq 'true') ? qq{ status="deprecated"} : qq{};
        my $ver = $dmr ? qq{ dmr:version="$version"} : qq{};
        my $after = ($dmr > 1 && $current_object) ?
            qq{ dmr:previousObject="$current_object"} : qq{};
	output @$active + 2, qq{<object name="$tname$ename" access="$access"$minEntries$maxEntries$numEntriesParameter$status$ver$after>};
	output @$active + 3, qq{<description>$description</description>};
	if (@$knames) {
            output @$active + 3, qq{<uniqueKey>};
            foreach my $key (@$knames) {
                output @$active + 4, qq{<parameter ref="$key"/>};
            }
            output @$active + 3, qq{</uniqueKey>};
	}
    }
    # XXX version does no harm here... but isn't used...
    push @$active, {name => $name, version => $version};

    # if maxnest is 0, must close just-opened object
    if ($maxnest == 0) {
	output_model_object_end($active);
    }
}

sub output_model_object_end
{
    my ($active) = @_;

    my $top = pop @$active;
    unless ($noobjects) {
        # XXX actually don't need version in $active now; using attribute
        #if ($dmr) {
        #    my $version = $top->{version};
        #    output(@$active + 3, qq{<dmr:version>$version</dmr:version>});
        #}
	output @$active + 2, qq{</object>};
    }
    if ($components) {
	output @$active + 1, qq{</component>};
    }

    # if requested, output a Services object at the top level
    my $vermap = {InternetGatewayDevice => '1.1', Device => '1.0'};
    if ($rootmodel && !@$active && !$output_services->{$current_model} &&
        $current_version eq $vermap->{$current_model}) {
	my $nam = $current_model . '.Services.';
	my $wrt = 0;
	my $dsc = 'This object contains general services information.';
	my $ver = $vermap->{$current_model};
	output_model_object_start($active, undef, $nam, $wrt, $dsc, $ver);
	$output_services->{$current_model} = 1;
    }
}

# return object path name from array of active objects and name of current object
sub path_name
{
    my ($active, $name) = @_;

    my $path = '';
    foreach my $item (@$active) {
        my $comp = $item->{name};
	$path .= $comp . '.';
    }

    # XXX can't we just stop here now?

    # object name will already have a trailing dot
    $path .= $name;

    # XXX this is the clunky bit; this is used when including components and
    #     so is the path within which the component is included, so the last
    #     name component must be removed
    #$path =~ s/\.\{/\{/g;
    $path =~ s/[^\.]+\.$//;
    #$path =~ s/{/\.\{/g;

    return $path;
}

# modify minEntries and maxEntries based on heuristic inspection of an
# object description
sub modify_entries {
    my ($minEntries, $maxEntries, $description) = @_;

    if ($description =~ /fixed,? with (exactly )?([0-9]+) entries/) {
        $minEntries = $2;
        $maxEntries = $2;
    }

    if ($minEntries eq '1' && $maxEntries eq '1' &&
        $description =~ /exclusive of any other.*object within.*instance/) {
        $minEntries = 0;
    }

    return ($minEntries, $maxEntries);
}

# parse the "keys" file, returning an appropriate hash
sub parse_keys_file
{
    my ($file) = @_;

    # do nothing unless the file is specified
    return {} unless $file;

    # parse the file
    my $parser = XML::LibXML->new();
    my $tree = $parser->parse_file($file);
    my $root = $tree->getDocumentElement;

    my $keys = {};
    foreach my $service ($root->findnodes('.//service')) {
	my $sname = $service->findvalue('@name');
	foreach my $table ($service->findnodes('table')) {
	    my $tname = $table->findvalue('@name');
	    my $index = $table->findvalue('@index');
	    $keys->{$sname}->{$tname}->{index} = $index;
	    foreach my $key ($table->findnodes('key')) {
		my $kname = $key->findvalue('@name');
		push @{$keys->{$sname}->{$tname}->{keys}}, $kname;
	    }
	}
    }

    return $keys;
}

# get table info
sub get_table_info
{
    my ($tname, $ename) = @_;

    # tname is the table object name and will end with parent object name,
    # period, table name and period, e.g. "...WANIPConnection.PortMapping."
    my @comps = split /\./, $tname;
    my $table = pop @comps;

    # parse out the parent object name (which corresponds to "service") and
    # the table name
    my $service = '';
    my $index = '';
    my $knames = [];
    if (@comps) {
	$service = pop @comps;
	$service = pop @comps if $service eq '{i}' && @comps;
	$index = $keys->{$service}->{$table}->{index};
	$knames = $keys->{$service}->{$table}->{keys};
	$knames = [] unless defined $knames;
    }

    # determine numEntriesParameter name
    my $oname = '';
    if ($ename) {
        $oname = $index ? $index : $table.'NumberOfEntries';

        # XXX apply heuristics to cover known cases where the naming
        #     conventions aren't followed (most of these are from IGD)
        $oname =~ s/ConditionalServingPool/ConditionalPool/;
        $oname =~ s/LAN(.+)InterfaceConfig/LAN$1Interface/;
        $oname =~ s/WANConnectionDevice/WANConnection/;
        $oname =~ s/WANConnectionService/ConnectionService/;

        # XXX this is wrong in TR-106 and right in WT-107
        $oname =~ s/RouteHopsNumberOfEntries/NumberOfRouteHops/ if
            $current_model eq 'Device';
    }

    return ($oname, $knames);
}

# output a model parameter definition
sub output_model_parameter
{
    my ($active, $curobj, $name, $typespec, $write, $description, $default,
	$version) = @_;

    return if $noparameters;

    # if maxnest is 0, must concatenate current object name and parameter name
    my $pname = ($maxnest == 0) ? $curobj.$name : $name;

    print STDERR "parameter $pname\n" if $verbose;

    ($description, my $list, my $values) = get_values($typespec, $description);
    # get attributes after values (so don't match OBSOLETED etc on values)
    my ($deprecated, $obsoleted) = get_attributes($description);
    my $typeinfo = get_typeinfo($name, $typespec, $description, $values);

    # XXX no longer written to XML, but keep the logic, just in case
    my $units = get_units($typeinfo, $description);
    my $hidden = true_false($typeinfo->{hidden});
    # XXX doesn't catch things like lists of MAC addresses
    my $access = access_string($write);
    my $activeNotify = '';
    $activeNotify = $notify_map->{$curobj}->{$name}->{activeNotify} if
        defined $notify_map->{$curobj}->{$name};
    $notify_map->{$curobj}->{$name}->{referenced} = 1 if $activeNotify;
    my $forcedInform = true_false($inform_map->{$curobj}->{$name});
    $inform_map->{$curobj}->{$name} = 2 if $forcedInform eq 'true';

    $description = expand_references($description);

    my $indent = @$active + ($noobjects ? 1 : 2);

    my $status = ($obsoleted eq 'true') ? qq{ status="obsoleted"} :
	($deprecated eq 'true') ? qq{ status="deprecated"} : qq{};
    $activeNotify = $activeNotify ? qq{ activeNotify="$activeNotify"} : qq{};
    $forcedInform = ($forcedInform eq 'true') ? qq{ forcedInform="true"} :qq{};
    my $ver = $dmr ? qq{ dmr:version="$version"} : qq{};
    my $after = ($dmr > 1 && $current_parameter) ?
        qq{ dmr:previousParameter="$current_parameter"} : qq{};

    output($indent, qq{<parameter name="$pname" access="$access"$status$activeNotify$forcedInform$ver$after>});

    output($indent + 1, qq{<description>$description</description>});

    $hidden = ($hidden eq 'true') ? qq{ hidden="true"} : qq{};
    # XXX no longer written to XML, but keep the logic, just in case
    #$units = ($units) ? qq{ units="$units"} : qq{};

    my $minInclusive =
	(defined $typeinfo->{minInclusive} && $typeinfo->{minInclusive} ne '')?
	qq{ minInclusive="$typeinfo->{minInclusive}"} : qq{};
    my $maxInclusive =
	(defined $typeinfo->{maxInclusive} && $typeinfo->{maxInclusive} ne '')?
	qq{ maxInclusive="$typeinfo->{maxInclusive}"} : qq{};

    # minLength and maxLength apply to the parameter only if it's not a list
    my $minLength = (!$list && defined $typeinfo->{minLength} &&
		     $typeinfo->{minLength} ne '') ?
		     qq{ minLength="$typeinfo->{minLength}"} : qq{};
    my $maxLength = (!$list && defined $typeinfo->{maxLength} &&
		     $typeinfo->{maxLength} ne '') ?
		     qq{ maxLength="$typeinfo->{maxLength}"} : qq{};

    my $end_element = ($minInclusive || $maxInclusive || $values) ? '' : '/';
    if ($oldsyntax) {
        $end_element = '' if $list;
    } else {
        $end_element = '' if !$list && ($minLength || $maxLength);
    }

    if ($oldsyntax) {
        my $dataType =
            (defined $typeinfo->{dataType} && $typeinfo->{dataType} ne '') ?
            qq{ ref="$typeinfo->{dataType}"} : qq{};
        output($indent + 1, qq{<syntaxOld$hidden>});
        output($indent + 2, qq{<$typeinfo->{type}$minLength$maxLength$dataType$end_element>});
    } else {
        my $baseref = $end_element ? 'ref' : 'base';
        output($indent + 1, qq{<syntax$hidden>});
        if ($list) {
            my $minLength = (defined $typeinfo->{minLength} &&
                             $typeinfo->{minLength} ne '') ?
                             qq{ minLength="$typeinfo->{minLength}"} : qq{};
            my $maxLength = (defined $typeinfo->{maxLength} &&
                             $typeinfo->{maxLength} ne '') ?
                             qq{ maxLength="$typeinfo->{maxLength}"} : qq{};
            if ($minLength || $maxLength) {
                output($indent + 2, qq{<list>});
                output($indent + 3, qq{<size$minLength$maxLength/>});
                output($indent + 2, qq{</list>});
            } else {
                output($indent + 2, qq{<list/>});
            }
        }
        if ($typeinfo->{dataType}) {
            output($indent + 2, qq{<dataType $baseref="$typeinfo->{dataType}"$end_element>});
        } else {
            output($indent + 2, qq{<$typeinfo->{type}$end_element>});
        }
        if ($minLength || $maxLength) {
            output($indent + 3, qq{<size$minLength$maxLength/>});
        }
    }

    if ($minInclusive || $maxInclusive) {
        output($indent + 3, qq{<range$minInclusive$maxInclusive/>});
    }

    if ($values) {
	foreach my $value (@$values) {
	    # XXX in theory can have access="readOnly"
	    my $status = ($value->{obsoleted} eq 'true') ?
		qq{ status="obsoleted"} : ($value->{deprecated} eq 'true') ?
		qq{ status="deprecated"} : qq{};
	    my $optional = ($value->{optional} eq 'true') ?
		qq{ optional="true"} : qq{};
            my $description = $value->{description};
            $description = expand_references($description);
            if ($oldenum) {
                # XXX should this be comment? no matter, since it's old
                $description = ($description ne '') ?
                    qq{ description="$description"} : qq{};
                output($indent + 3, qq{<value$status$optional$description>$value->{value}</value>});
            } elsif ($description) {
                output($indent + 3, qq{<enumeration value="$value->{value}"$status$optional>});
                output($indent + 4, qq{<description>$description</description>});
                output($indent + 3, qq{</enumeration>});
            } else {
                output($indent + 3, qq{<enumeration value="$value->{value}"$status$optional/>});
            }
	}
    }

    if ($oldsyntax) {
        output($indent + 2, qq{</$typeinfo->{type}>}) unless $end_element;
        my $minLength = ($list && defined $typeinfo->{minLength} &&
                      $typeinfo->{minLength} ne '') ?
                      qq{ minLength="$typeinfo->{minLength}"} : qq{};
        my $maxLength = ($list && defined $typeinfo->{maxLength} &&
                      $typeinfo->{maxLength} ne '') ?
                      qq{ maxLength="$typeinfo->{maxLength}"} : qq{};
        output($indent + 2, qq{<list$minLength$maxLength/>}) if $list;
        output($indent + 1, qq{</syntaxOld>});
        output($indent + 1, qq{<default type="object"/>})
            if defined($default) && $default eq '';
        output($indent + 1, qq{<default type="object">$default</default>})
            if defined($default) && $default ne '';
    } else {
        my $dataType = $typeinfo->{dataType} ? 'dataType' : $typeinfo->{type};
        output($indent + 2, qq{</$dataType>}) unless $end_element;
        output($indent + 2, qq{<default type="object" value="$default"/>})
            if defined($default);
        output($indent + 1, qq{</syntax>});
    }

    output($indent, qq{</parameter>});
}

# get enumerated values from the description (returns modified description),
# whether parameter is list-values, and array of values
# XXX expects XML escapes not to have been applied (to make it more convenient
#     to handle "<>" characters) but this isn't a good enough reason!
sub get_values
{
    my ($type, $indescr) = @_;

    # pattern that determines whether the value is a list rather than a single
    # value
    # XXX allow "coma separated" typo (PD-157v1)
    my $listpatt = '[Cc]omm?a[ -]separated';

    # XXX shouldn't really do the list bit here...
    my $list = $indescr =~ /$listpatt/;

    # don't even look for values unless it's of type string
    return (xml_escape($indescr), $list, undef) unless $type =~ /^string/;

    # patterns that are checked for each line (enumpatt introduces a list of
    # enumerated values; each value must then match quotpatt)
    # XXX some of the enumpatt values are rather specialized...
    my $enumpatt = 'enumeration:|enumeration of:|enumeration of one of the following:|enumeration of the following strings:|enumeration of the following status strings:|following enumerated list:|list of:|one of:|enumerated type|enumerated list|values are:|values will be:|from the list:|possible items are:|for annex [a-c]:';
    my $quotpatt = '^[\"<]';

    my $outdescr =  '';
    my $values = [];
    my $prefix = '';
    my $is_enum = 0;
    my $ever_enum = 0;
    my $added_values = 0;
    foreach my $line (split /\n/, $indescr) {
	my $was_enum = $is_enum;
	if ($is_enum) {
	    # ignore blank lines at start of list
	    next if @$values == 0 && $line eq '';
	    # XXX some of the quotpatt exclusions are rather specialized...
	    $is_enum = ($line =~ /$quotpatt/i &&
			$line !~ /11i.*SHOULD/);
	}
	if (!$is_enum) {
	    $is_enum = ($line =~ /$enumpatt/i);
	    $ever_enum = 1 if $is_enum;
	    # XXX the annex a test is _very_ specialized...
	    if ($is_enum && (@$values > 0 || $line =~ /^for annex a/i)) {
		$prefix = $line;
		$prefix =~ s/:$//;
		next;
	    }
	}
	if (!$was_enum || !$is_enum) {
	    if (!$added_values && @$values > 0) {
		$outdescr .= "{{enum}}\n";
		$added_values = 1;
	    }
	    $outdescr .= $line . "\n";
	} else {
	    # closing terminator is optional so its absence will be obvious
	    my @match = ($line =~ /[\"<]([^\">]*)[\">]?\s*\(?([^\)]*)\)?/);
	    my $value = {};
	    $value->{value} = defined($match[0]) ? $match[0] : '';
	    $value->{description} = defined($match[1]) ? $match[1] : '';
            # XXX this catches period after enum value
            $value->{description} = '' if $value->{description} eq '.';
	    $value->{value} = '<' . $value->{value} . '>' if $line =~ /^</;
	    $value->{description} =
                $prefix . ($value->{description}? ': ' : '') .
		$value->{description} if $prefix;
	    foreach my $attr (('optional', 'deprecated', 'obsoleted')) {
		my $ATTR = uc $attr;
		if ($value->{description} !~ /$ATTR/) {
		    $value->{$attr} = true_false(0);
		} else {
		    $value->{$attr} = true_false(1);
		    $value->{description} =~ s/\s*[\(,]?\s*$ATTR[,\)]?\s*//;
		}
	    }
	    $value->{deprecated} = 'true' if $value->{obsoleted} eq 'true';
	    $value->{value} = xml_escape($value->{value}, {attr => 1});
	    $value->{description} =
                xml_escape($value->{description}, {attr => 1});
	    push @$values, $value;
	}
    }

    # if never saw anything that suggested that this parameter has enumerated
    # values, behave as though we never even checked for them
    # XXX not sure that $ever_enum is useful (better just to check values?)
    return (xml_escape($indescr), $list, undef)
        unless $ever_enum && @$values;

    # don't want terminating newline in the description
    chop $outdescr;

    $list = $outdescr =~ /$listpatt/;
    return (xml_escape($outdescr), $list, $values);
}

# get deprecated and obsoleted attributes from object or parameter
# description
sub get_attributes
{
    my ($description) = @_;

    my $deprecated = ($description =~ /(parameter|object) is DEPRECATED/i);
    my $obsoleted = ($description =~ /(parameter|object) is OBSOLETED/);

    # XXX might not need these exclusions now
    $obsoleted = 0 if $description =~ /[Tt]he OBSOLETED values/;
    $obsoleted = 0 if $description =~ /cannot be OBSOLETED/;

    $deprecated = 1 if $obsoleted;

    return (true_false($deprecated), true_false($obsoleted));
}

# get typeinfo from typespec of one of the following forms
#   type                            (any types)
#   type[minInclusive:maxInclusive] (numeric types only)
#   type(minLength:maxLength)       (string only)
#   type(maxLength)                 (string only)
#
# also check the name and or description for indications of "hidden"
# parameters, such as passwords, and for data types (textual conventions),
# such as IP addresses
sub get_typeinfo
{
    my ($name, $typespec, $description, $values) = @_;

    my ($a, $b, $c, $d, $e) =
	($typespec =~ /\s*(\w+)\s*([\[\(]?)([\-\dkK]*)(:?)([\-\dkK]*)/);
    #                     ($a )   ($b     )($c )($d)($e )
    # $a is type name
    # $b is opening separator '[' or '(' 
    # $c is min value / length
    # $d is colon separator
    # $e is max value / length

    # deal with 'k' / 'K' suffices (could support more different ones?)
    $c = process_suffix($c) if $c;
    $e = process_suffix($e) if $e;

    # fix separator errors
    if ($a =~ /int$/i && $b eq '(') {
        $b = '[';
        print STDERR "get_typeinfo: $name: changed [] to ()\n";
    } elsif ($a =~ /^(base64|string)$/ && $b eq '[') {
        $b = '(';
        print STDERR "get_typeinfo: $name: changed () to []\n";
    }

    my $typeinfo = {};
    $typeinfo->{type} = (defined $values && @$values && $oldenum) ?
        'enumeration' : type_fix($a);
    $typeinfo->{minInclusive} = $c if $b eq '[';
    $typeinfo->{maxInclusive} = $e if $b eq '[';
    $typeinfo->{minLength} = $c if $b eq '(' && $d;
    $typeinfo->{maxLength} = $c if $b eq '(' && !$d;
    $typeinfo->{maxLength} = $e if $b eq '(' && $d;

    # convert base64 length from encoded to actual
    if ($typeinfo->{type} eq 'base64' && $typeinfo->{maxLength}) {
        my $omin = $typeinfo->{minLength} ? $typeinfo->{minLength} : 0;
        my $omax = $typeinfo->{maxLength};
        my $nmin = $omin / 4 * 3;
        my $nmax = $omax / 4 * 3;
        print STDERR "get_typeinfo: $name: base64($omin:$omax) -> ".
            "($nmin:$nmax) (might be ".($nmax-1)." or ".($nmax-2).")\n" if
            $pedantic;
        $typeinfo->{minLength} = $nmin;
        $typeinfo->{maxLength} = $nmax;
    }

    $typeinfo->{hidden} = 1
	if $description =~ /when read.*returns an empty string|reading.*must always return.*empty/i;

    # XXX PVC test roots out DestinationAddress in WANDSLConnectionManagement
    $typeinfo->{dataType} = 'IPAddress'
	if $typeinfo->{type} eq 'string' && $name =~ /(dest|destination|group|host|ip|source|src|server|subnet).*(addr|address|ip|mask)$/i && $description !~ /(PVC:)|(host name)/i;
    $typeinfo->{dataType} = 'MACAddress'
	if $typeinfo->{type} eq 'string' && $name =~ /mac.*(addr|address)$/i;
    $typeinfo->{type} = 'dataType' if $typeinfo->{dataType};

    return $typeinfo;
}

# get units from typeinfo and description
# XXX is very heuristic and is not bullet-proof
sub get_units
{
    my ($typeinfo, $description) = @_;

    return '' unless $typeinfo->{type} =~ /int|unsigned/;

    return 'Kbps' if $description =~ /kbps/i;
    return 'Mbps' if $description =~ /mbps/i;
    return 'cells' if $description =~ /cells/i;
    return 'bytes' if $description =~ /bytes/i;
    return 'packets' if $description =~ /packets/i;
    return 'seconds' if $description =~ /seconds/i;

    return '';
}

# generate actual component name from an object name
sub component_name
{
    my ($name) = @_;

    $name =~ s/^Device\.//
	unless $name eq 'Device.';
    $name =~ s/^InternetGatewayDevice\.//
	unless $name eq 'InternetGatewayDevice.';

    $name =~ s/\./_/g;
    $name =~ s/\{i\}/i/g;
    $name =~ s/_$//;

    $name =~ s/$/_object/ if $noparameters;
    $name =~ s/$/_params/ if $noobjects;

    if (exists $components_map->{$name}) {
        my $i = 0;
        while (exists $components_map->{$name.'_'.$i}) {
            $i++;
        }
        $name .= '_'.$i;
    }
    $components_map->{$name} = 1;

    return $name;
}

# generate actual object name from an object name; if the object is a table,
# then both the table name and its entries' name are returned
sub object_name
{
    my ($name) = @_;

    $name =~ s/\.\{/\{/g;

    my @comps = split /\./, $name;
    my $lname = pop @comps;

    $lname =~ s/\{/\.\{/g;
    my ($tname, $ename) = split /\./, $lname;
    $tname = '' unless defined $tname;
    $ename = '' unless defined $ename;

    # XXX seems to be a problem with pname; if ever used will be two dots?
    my $pname = $components ? '' : join '.', @comps;
    $pname =~ s/\{/\.\{/g;

    $pname .= '.' if $pname ne '';
    $tname .= '.' if $tname ne '';
    $ename .= '.' if $ename ne '';
    return ($pname.$tname, $ename);
}

# parse a profile table
my $profiles = {};
sub parse_profile_table
{
    my ($info, $index) = @_;

    my $table = $info->{table};
    my $name = $info->{arg0};
    my $prover = $info->{arg1};

    # XXX profile version will be empty for profiles that have more than one
    #     version; for now assume 1
    $prover = '1' unless $prover;

    print STDERR "parsing profile table $name\n" if $verbose;

    # XXX this is messy... have I broken it? works for TR-143 now
    my $modnam = @{$define->{name}}[$index];
    my @modsup = ();
    push @modsup, $info->{arg3} if $info->{arg3}; 
    push @modsup, $info->{arg5} if $info->{arg5}; 
    print STDERR "modnam=$modnam modsup=@modsup\n" if $verbose;
    return unless grep {$modnam} @modsup;

    # XXX model minor version isn't necessarily the model's minor version, it
    #     might be less (need to either extract from Word or else derive by
    #     looking at the contained objects and parameters); THIS IS IMPORTANT
    my $modver = @{$define->{version}}[$index];
    my ($modmaj, $modmin) = split /\./, $modver;

    # if the profile version is specified, it must be 1 (the supplied profile
    # version is no longer used; versions 1, 2 etc are assumed to be defined
    # table columns 2, 3 etc)
    print STDERR "profile version in $name:$prover is not 1\n"
        if $prover && $prover != 1;

    # inspect the first row to determine the number of profile versions
    my @nodes = $table->findnodes('.//row[1]/*');
    my $numpro = @nodes - 1;
    print STDERR "$numpro profile version(s)\n" if $verbose;

    # avoid re-definition
    my $label = qq{model: $modnam:$modmaj.$modmin profile: $name:$prover};
    if (defined $profiles->{$label}) {
        print STDERR "$label already defined\n";
        return;
    }
    $profiles->{$label} = 1;

    # generate XML for each profile version separately
    # XXX should use base for version 2, 3 etc; also extends
    my $i = $oldsyntax ? 0 : 1;
    for ($prover = 1; $prover <= $numpro; $prover++) {
        print STDERR "parsing profile version $prover\n" if $verbose;
    
        # start of XML
        my $model = $oldprofile ? qq{ model="$modnam:$modmaj.$modmin"} : qq{};
        output $i+1, qq{<profile name="$name:$prover"$model>};

        # first column is object/parameter name; nth column is requirement for
        # profile version n-1.
        my $first = 1;
        my $active = 0;
        foreach my $row ($table->findnodes('.//row')) {

            # skip header row
            # XXX could extract profile version from column heading
            if ($first) {
                $first = 0;
                next;
            }

            # get object / parameter name and requirement
            my $name = get_entry($row, 1, {black => 1});
            my $requirement = get_entry($row, $prover + 1, {black => 1});

            # ignore entries with blank name
            # XXX blank name has been seen with deleted rows
            next unless $name;

            # remove leading period, if present
            $name =~ s/^\.//;

            # fix typos
            my $object = ($name =~ /\./);
            $name = name_fix($name, $modnam, $object);

            # output row(s)
            if ($object) {
                print STDERR "C|A|D without P for $name in profile\n"
                    if ($requirement =~ /C|A|D/ && $requirement !~ /P/);
                # check for "C" first, because sometimes see "PC"
                $requirement = ($requirement =~ /C/) ? "createDelete" :
                    ($requirement =~ /A/) ? "create"  :
                    ($requirement =~ /D/) ? "delete"  :
                    ($requirement =~ /P/) ? "present" : "notSpecified";
                output($i+2, qq{</object>}) if $active;
                output($i+2, qq{<object ref="$name" requirement="$requirement">});
                $active = 1;
            } elsif ($requirement && $requirement !~ /-/) {
                $requirement =
                    ($requirement =~ /W/) ? "readWrite" : "readOnly";
                output($i+3, qq{<parameter ref="$name" requirement="$requirement"/>});
            }

            # XXX need also to extract footnote requirements
        }
        output($i+2, qq{</object>}) if $active;

        # end of XML
        output $i+1, qq{</profile>};
    }
}

# parse (bibliographic) references (an array of textual, unparsed, references)
sub parse_references
{
    my ($references) = @_;

    # do nothing if no references
    return unless @$references;

    # hyperlink pattern
    my $hyperpatt = 'https?:\/\/';

    # date pattern
    my $datepatt = '((jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\w*)?\s*(\d+)?[,\s]*(19[7-9][0-9]|20[0-9]{2,2})';

    # start of XML
    output 1, qq{<bibliography>};

    # each reference is an unparsed string; do the best possible by assuming
    # it's a comma-separated list and applying some heuristics
    # XXX it's not in the XML, but assume that the references are numbered 1,
    #     2, 3 etc in the Word doc
    my $ctr = 1;
    foreach my $value (@$references) {
        my $name = '';
        my $title = '';
        my $organization = '';
        my $category = '';
        my $date = '';
        my $hyperlink = '';

        # before splitting, tidy up a little to avoid spurious parse:
        # - XML escape
        # - get rid of commas in dates
        # - get rid of trailing period
        $value = xml_escape($value);
        $value =~ s/([0-9]+),(\s*[0-9]+)/$1$2/g;
        $value =~ s/, (revision|version)/ $1/i;
        $value =~ s/\.$//;
        my @fields = split /\s*,\s*/, $value;

        # if last field looks like a hyperlink, handle it first
        $hyperlink = pop @fields if $fields[@fields-1] =~ /$hyperpatt/i;
        $hyperlink =~ s/earlier version\s+//;
        $hyperlink =~ s/available (at|from)\s+//;

        # if last field looks like a date, handle it next
        $date = pop @fields if $fields[@fields-1] =~ /$datepatt/i;

        # assign the remaining fields somewhat arbitrarily
        $name = shift @fields;
        $title = shift @fields;
        $organization = shift @fields;
        $category = shift @fields;
        if (@fields) {
            my $excess = join ', ', @fields;
            print STDERR "excess fields in reference ignored: $excess\n";
        }

        # if don't have name (which is mandatory) but do have date, promote
        # date to name
        if (!$name && $date) {
            $name = $date;
            $date = '';
        }

        # if name appears to contain a sentence or phrase ending, take the
        # first part as the name and the second as the title
        if (my @match = ($name =~ /(.*)[:\.]\s+(.*)/)) {
            ($name, $title) = @match;
        }

        # fix known typos in the name
        $name = reference_name_fix($name, $title);

        # override category heuristically
        $category = 'RFC' if $value =~ /RFC/;
        $category = 'IEEE' if $value =~ /IEEE/;
        $category = 'TR' if $value =~ /(TR-|Technical Report)/;

        # set organization from category or hyperlink
        $organization = 'BBF' if $category && $category eq 'TR';
        $organization = 'IETF' if $category && $category eq 'RFC';
        $organization = 'IEEE' if $category && $category eq 'IEEE';
        $organization = 'W3C' if $hyperlink && $hyperlink =~ /w3\.org/;

        # override organization heuristically
        # XXX might not need this any more
        $organization = 'BBF' if
            $organization && $organization =~ /(Broadband|DSL) Forum/;
        
        # derive id from name, applying heuristics
        my $id = $name;
        $id =~ s/\([^\}]*\)//g;
        $id =~ s/Amendment/-A-/i;
        $id =~ s/Corrigendum/-C-/i;
        $id =~ s/Issue/-I-/i;
        $id =~ s/Version/-V-/i;
        $id =~ s/Blue/BLUE/i;
        $id =~ s/(A New Class of Active )?Queue Management( Algorithms)?//i;
        $id =~ s/CWMP Data Model (Definition )?(XML )?Schema/DM SCHEMA/;
        $id =~ s/Extensible Markup/Xtensible Markup/i;
        $id =~ s/IEEE Std//i;
        $id =~ s/ITU-[A-Z] Recommendation//i;
        $id =~ s/References?//i;
        $id =~ s/Schema Part/SCHEMA/i;
        $id =~ s/Specifications?//i;
        $id =~ s/SSL Protocol/SSL/;
        $id =~ s/Wi-Fi/W/;
        $id =~ s/Setup Specification/Setup/;
        $id =~ s/XML-Sig.*/XMLSIG/;
        $id =~ s/The //i;
        $id =~ s/[a-z]//g;
        $id =~ s/[:,].*//g;
        $id =~ s/#//g;
        $id =~ s/\s//g;
        $id =~ s/-A-/a/;
        $id =~ s/-C-/c/;
        $id =~ s/-I-/i/;
        $id =~ s/-V-/v/;
        print STDERR "* REF: $name -> $id\n" if $verbose;

        # reject known bad IDs
        next if $id =~ /^(-+|DMSCHEMA|I|QB|QBM|RPCM)$/;

        # reject duplicate IDs
        # XXX probably these will be because of mis-parsing,
        #     which has been seen to find the references twice
        next if $bibref_ids->{$id};

        output 2, qq{<reference id="$id">};
        output 3, qq{<name>$name</name>} if $name;
        output 3, qq{<title>$title</title>} if $title;
        output 3, qq{<organization>$organization</organization>} if
            $organization;
        output 3, qq{<category>$category</category>} if $category;
        output 3, qq{<date>$date</date>} if $date;
        foreach my $link (split /\s+or\s+/, $hyperlink) {
            output 3, qq{<hyperlink>$link</hyperlink>};
        }
        output 2, qq{</reference>};

        $bibref_ids->{$id} = 1;
        $bibref_map->{$ctr} = $id;
        $ctr++;
    }

    # end of XML
    output 1, qq{</bibliography>};
}

# fix known typos in reference names
sub reference_name_fix
{
    my ($name, $title) = @_;

    my $orig = $name;

    $name =~ s/G.992.1/G.991.2/g if $name =~ /G.992.1/ && $title =~ /SHDSL/;

    print STDERR "reference_name_fix: $orig -> $name\n" if
        $pedantic and $name ne $orig;

    return $name;
}

# expand references (assume of the form "[n]"), converting to {{bibref}}
sub expand_references
{
    my ($value) = @_;

    while (my ($n) = ($value =~ /\[(\d+)\]/)) {
        my $id = $bibref_map->{$n};
        if ($id) {
            $value =~ s/\[$n\]/{{bibref|$id}}/;
        } else {
            $value =~ s/\[$n\]/\[#$n#\]/;
        }
    }
    $value =~ s/\[#(\d+)#\]/[$1]/g;
    return $value;
}

# return the specified entry from the given row; last argument is a reference
# to a hash of processing options
sub get_entry
{
    my ($row, $col, $opts) = @_;

    my $func = $opts->{raw} ? 'string' : 'normalize-space';
    my $default = defined($opts->{default}) ? $opts->{default} : '';
    my $value = $col ? $row->findvalue("$func(entry[$col])") : $default;

    # conditionally escape characters that are special to XML (sometimes want
    # to defer this)
    $value = xml_escape($value) unless $opts->{noescape};

    # always strip white space (pass extra options)
    # XXX sometimes would like to retain leading white space, since could then
    #     treat specially, e.g. verbatim (trouble is that so much of it is
    #     spurious)
    $value = white_strip($value, $opts);

    return $value;
}

# remove DOCTYPE element from XML file, returning remaining XML as a string
sub remove_doctype
{
    my ($file) = @_;

    my $string = '';

    open(FILE, $file) or die "Can't open $file: $!\n";

    my $doctype = 0;
    foreach $_ (<FILE>) {
	if (/\<\!DOCTYPE/) {
	    $doctype = 1;
	}
	if (!$doctype) {
	    $string .= $_;
	}
	if ($doctype && /dtd\"\>/) {
            $doctype = 0;
        }
    }

    return $string;
}

# fixes typos in names
sub name_fix
{
    my ($name, $modnam, $object) = @_;

    my $orig = $name;

    $name =~ s/,$/\./; # WT-135v6.3
    $name =~ s/\.\./\./g; # WT-140v5.2

    # XXX these ones suppressed because they break TR-106
    #$name =~ s/^Device.LAN.{i}.//; # dsl2006.629.00
    #$name =~ s/^Device.LAN.//; # dsl2006.629.00

    $name =~ s/LANDevice\{i\}/WANDevice\.\{i\}/;
    $name =~ s/WANDevice\{i\}/WANDevice\.\{i\}/;

    $name =~ s/profile\{/Profile.\{/; # dsl2006.629.00
    $name =~ s/-//g; # WT-107v12
    $name =~ s/\([0-9]*\)//; # WT-107v12
    $name =~ s/EMailService/EmailService/; # PD-158v1.1

    # ETSI services
    $name =~ s/^\&lt;rootdevice\&gt;\.Services\.//;
    $name =~ s/[\(\{]i[\}\)]/\{i\}/g;

    # XXX remove footnote references (want to process these really)
    $name =~ s/\[\d+\]$//;

    if ($object) {
	$name =~ s/$/\./ if $name !~ /\.$/;
    }

    print STDERR "name_fix: $orig -> $name\n" if $pedantic and $name ne $orig;

    # don't complain about most other changes

    if ($object && $rootmodel) {
        my $root = $current_model;
        my $oroot = $current_model eq 'Device' ?
            'InternetGatewayDevice' : 'Device';
        if ($name =~ /^\./) {
            $name = $root . $name;
        } else {
            $name = $root . '.' . $name if $name !~ /^($root|$oroot)\./;
        }
    }

    if ($object) {
	$name =~ s/^\.// if $name =~ /^\./;
    }

    if ($object && !$components && $name !~ /^\.?$modnam\./) {
        print STDERR "name_fix: model name ($modnam) mismatch: $name\n";
    }

    return $name;
}

# fixes typos in types
sub type_fix
{
    my ($type, $name) = @_;

    return $type unless $type;

    my $orig = $type;

    $type =~ s/Base64/base64/; # WT-107
    $type =~ s/^bool$/boolean/; # TR-069
    $type =~ s/Boolean/boolean/; # WT-140
    $type =~ s/char/string/; # dsl2006.347.00
    $type =~ s/datetime/dateTime/; # WT_196_pre-V100_final
    $type =~ s/Integer/int/; # WT-140
    $type =~ s/Int/int/; # WT-107
    $type =~ s/^objec$/object/; # dsl2006.347.00
    $type =~ s/Object/object/; # WT-140
    $type =~ s/String/string/; # WT-140
    $type =~ s/usignedint/unsignedInt/; # WT-107
    $type =~ s/unsignedint/unsignedInt/; # TR-104
    $type =~ s/UnsignedInt/unsignedInt/; # WT-135
    $type =~ s/Unsignedint/unsignedInt/; # WT-107
    $type =~ s/Uint/unsignedInt/; # WT-143

    $type =~ s/Long/long/;

    $type =~ s/component/object/;

    $type = 'object' if (!$type && $name =~ /WLANStation/); # dsl2006.629.00
    $type = 'object' if (!$type && $name =~ /UserGroup/); # WT-140v5.1

    print STDERR "type_fix: $name: $orig -> $type\n"
	if $pedantic and $type ne $orig;

    return $type;
}

# fixes non-ASCII characters and typos in descriptions
sub description_fix
{
    my ($description) = @_;

    my $orig = $description;

    # XXX should do to all fields, not just description?
    $description =~ s/\N{NO-BREAK SPACE}/ /g;
    $description =~ s/\N{NARROW NO-BREAK SPACE}/ /g;
    $description =~ s/\N{ZERO WIDTH NO-BREAK SPACE}/ /g;
    $description =~ s/\N{EN DASH}/-/g;
    $description =~ s/\N{EM DASH}/-/g;
    $description =~ s/\N{HORIZONTAL ELLIPSIS}/\.\.\./g;
    $description =~ s/\N{LEFT SINGLE QUOTATION MARK}/\`/g;
    $description =~ s/\N{RIGHT SINGLE QUOTATION MARK}/\'/g;
    $description =~ s/\N{LEFT DOUBLE QUOTATION MARK}/\"/g;
    $description =~ s/\N{RIGHT DOUBLE QUOTATION MARK}/\"/g;

    # XXX replace selected Greek letters
    $description =~ s/\N{GREEK SMALL LETTER MU}/u/g;
    $description =~ s/\N{MICRO SIGN}/u/g;

    # XXX replace selected accents;
    #     ref http://www.electriceditors.net/langline/accents.php#ascii
    $description =~ s/\N{LATIN SMALL LETTER U WITH GRAVE}/u\\/g;

    # XXX ignore other common non-ASCII punctuation
    $description =~ s/\N{SECTION SIGN}/section/g;

    # XXX ignore bullets at the end of lines
    $description =~ s/\s*\N{BULLET}\s*\n/\n/g;
    $description =~ s/\n\N{BULLET}\s*/\n/g;
    $description =~ s/\s*\N{BULLET}\s*/\n/g;

    # XXX this breaks "OnDemand" (fix when original problem re-arises)
    #$description =~ s/\"On[^\"](\s)/\"On\"/;
    $description =~ s/\"Basicand11i\'/\"Basicand11i\"/;
    $description =~ s/\"M2x-M\'/\"M2x-M\"/;
    $description =~ s/One of;/One of:/;
    $description =~ s/:XFS\"/\"XFS\"/;
    $description =~ s/On-Pwd /On-Pwd\" /;

    # extra terminating quote, e.g. "word"" -> "word"
    $description =~ s/\"([^\"]+)\"\"/\"$1\"/;

    $description =~ s/Valid values will be :/Valid values will be:/;
    $description =~ s/enumeration for/enumeration of/i;
    $description =~ s/enumerate type/enumerated type/i;

    # XXX this can happen if there are footnote references
    $description =~ s/enumeration\[?[0-9]+\]? /enumeration /;
    $description =~ s/unique\[?[0-9]+\]? /unique /; # TR-140

    print STDERR "description_fix: $orig -> $description\n"
	if $pedantic > 2 and $description ne $orig;

    return $description;
}

# fixed typos in default values
sub default_fix
{
    my ($default) = @_;

    my $orig = $default;

    $default =~ s/\N{EN DASH}//g;
    $default =~ s/\N{EM DASH}//g;

    $default =~ s/\N{LEFT DOUBLE QUOTATION MARK}/\"/g;
    $default =~ s/\N{RIGHT DOUBLE QUOTATION MARK}/\"/g;

    # these are common typos
    $default =~ s/^False$/false/;
    $default =~ s/^True$/true/;

    # XXX not exactly a fix, but remove quotes from strings (report tool can
    #     add them back if it wants to)
    $default =~ s/\"//g;

    # a default of "" is probably meant to be "-"
    $default = '-' if $default eq '';

    # a default of "<Empty>" means "empty default" (get rid of it)
    $default =~ s/\&lt;Empty\&gt;//;

    # a default starting "-[" is probably a footnote reference
    $default =~ s/^-\[.*/-/;

    print STDERR "default_fix: $orig -> $default\n"
	if $pedantic > 2 and $default ne $orig;

    # a default of "-" indicates "no default" (undefine the value)
    undef $default if $default =~ /^-$/;

    return $default;
}

# fixes typos in version numbers
sub version_fix
{
    my ($version) = @_;

    my $orig = $version;

    $version =~ s/-//; # dsl2006.859.00

    print STDERR "version_fix: $orig -> $version\n"
	if $pedantic and $version ne $orig;

    return $version;
}

# escape characters that are special to XML
sub xml_escape {
    my ($value, $opts) = @_;

    $value =~ s/\&/\&amp;/g;
    $value =~ s/\</\&lt;/g;
    $value =~ s/\>/\&gt;/g;

    # only quote quotes in attribute values
    $value =~ s/\"/\&quot;/g if $opts->{attr};

    return $value;
}

# strip leading and trailing white space and, optionally, other space
sub white_strip
{
    my ($string, $opts) = @_;

    # always remove leading and trailing white space
    $string =~ s/^\s*//g;
    $string =~ s/\s*$//g;

    # also any spaces or tabs after newlines
    $string =~ s/\n[ \t]*/\n/g;

    # optionally collapse multiple spaces
    $string =~ s/\s+/ /g if $opts->{collapse};

    # optionally remove all white space
    if ($opts->{black}) {
	my $orig = $string;
	$string =~ s/\s+//g;
	print STDERR "white_strip: had to remove extra spaces in $orig\n" if
	    $pedantic && $opts->{blackwarn} && $string ne $orig;
    }

    return $string;
}

# process suffix in string like "32k" to return valid number
# also allow trailing "-n"
sub process_suffix
{
    my ($value) = @_;
    
    my ($num, $mult, $delt) = $value =~ /(-?\d+)([kK]?)-?(\d*)/;

    $mult = $mult ? 1024 : 1;
    $delt = 0 unless defined $delt && $delt ne '';

    $value = $num * $mult - $delt;
    
    #if ($value =~ /k$/i) {
    #	chop $value;
    #	$value = $value * 1024;
    #}

    return $value;
}

# return string "readOnly" or "readWrite" based on argument
sub access_string
{
    my ($write) = @_;

    # XXX R, O and C are from old data models that pre-date profiles
    return ($write =~ /R|O|C|W/) ? "readWrite" : "readOnly";
}

# return string "true" or "false" based on argument
sub true_false
{
    my ($string) = @_;

    return $string ? "true" : "false";
}

# documentation
=head1 NAME

B<tr2dm.pl> - generate TR-069 data model definition from DocBook XML

=head1 SYNOPSIS

B<tr2dm.pl> [--components] [--define name=value]... [--device] [--dmr[=i(1)]] [--help] [--maxnest=i(1)] [--nomodels] [--noobjects] [--noparameters] [--nooutput] [-notypes] [--oldenum] [--oldprofile] [--oldsyntax] [--pedantic[=i(1)]] [--rootmodel] [--spec=<spec>] [--verbose[=i(1)]]
docbook-xml-file...

=head1 DESCRIPTION

The files specified on the command line are assumed to be DocBook XML files, as generated by I<antiword>, e.g.

antiword -x db I<data-model>.doc >I<data-model>.xdb

where I<data-model>.doc is a DSLF TR or WT that defines a TR-069 data model using the traditional Word tables.

The "xdb" (B<XML> B<D>oc B<B>ook) extension is just a convention, but allows the script output to use the "xml" extension.

The script searches for "tables of interest", meaning data model definitions, notify requirements, and profile definitions.

It then generates XML output compliant with the CWMP data model XML Schema; sends it to I<stdout>.

=head1 OPTIONS

=over

=item B<--components>

generates a component for each object and uses them when generating the I<model> element (B<noobjects> controls whether each component will include an I<object> element and B<noparameters> controls whether each component will include I<parameter> elements)

=item B<--define name=value>

specifies (name,value) pairs to be used in data model generation; values of the form "a+b+c" are treated as arrays of the form [a,b,c] with the first items applying to the first data model, the second to the second, etc; currently supports the following names:

=over

=item B<name>

data model name (defaults to "unknown")

=item B<version>

data model version (defaults to "1.0")

=back

=item B<--device>

requests insertion of top-level I<Device> object, necessary because TR-106 doesn't define one (appropriate only for TR-106)

=item B<--dmr>

include attributes and elements from the I<Data Model Report> (I<dmr>) namespace, which are intended to assist in report generation (if a value is given, the larger the number the more is included)

=item B<--help>

requests output of usage information

=item B<--maxnest=i(1)>

specifies maximum nesting level of object and parameter definitions

0 causes all object and parameter definitions to be at the top level and to have fully-qualified names

1 (default) causes all object definitions to be at the top level (with fully-qualified names), and all parameters to be nested within their objects

any other value causes fully-nested object and parameter definitions

=item B<--noobjects>

suppresses generation of models (and therefore profiles)

=item B<--noobjects>

suppresses generation of I<object> element (used in conjuction with B<components>)

=item B<--nooutput>

suppresses output of the data model definition to I<stdout> (useful if you want to see error messages or verbose output)

=item B<--noparameters>

suppresses generation of I<parameter> elements (used in conjuction with B<components>)

=item B<--notypes>

suppresses generation of data type definitions

=item B<--oldenum>

generate old-style enumerations

=item B<--oldprofile>

generate old-style profiles (outside model element)

=item B<--oldsyntax>

generate old-style parameter syntax

=item B<--pedantic=[i(1)]>

enables output of warnings to I<stderr> when errors in the input have to be
fixed; if the option is specified without a value, the value defaults to 1

=item B<--rootmodel>

requests insertion of a I<Services> object, appropriate only for root data models, i.e. TR-106 and TR-098

=item B<--spec=<spec>>

specifies the specification name, e.g. I<tr-098-1-1>, where the numbers are major and minor document version, or issue and amendment, depending on document type, with appropriate defaulting rules; if not already a URN, I<urn:broadband-forum-org:> is prefixed

=item B<--verbose=[i(1)]>

enables sending of verbose output to I<stderr>; if the option is specified without a value, the value defaults to 1

=back

=head1 LIMITATIONS

It is necessary to use a patched version of I<antiword> to avoid problems with table rows (details to follow).  Even the patched version cannot always be trusted to find all definitions (sometimes it arbitrarily omits or duplicates table rows).  If I<antiword> problems cannot be addressed, an alternative method will have to be used, e.g. extracting tables into Excel and exporting as XML, or direct export of the Word document as Excel.

The script doesn't yet extract all the information from the Word file, or interpret it to the extent possible, e.g. it doesn't yet attempt to process parameter descriptions in order to generate a list of enumerated values, and it doesn't process footnote text.

=cut
